library(BiocManager)
library(Seurat)
library(tidyverse)
library(Matrix)
library(RCurl)
library(scales)
library(cowplot)
library(ggpubr)
library(ggplot2)
library(SingleCellExperiment)
library(AnnotationHub)
library(ensembldb)
library(tidyr)
library(dplyr)
library(Rsamtools)
library(multtest)
library(DESeq2)
library(gtools)
library(devtools)
library(sctransform)
library(reticulate)
library(stringr)
library(monocle)
library(topGO)
library(viridis)
library(MAST)
library(RColorBrewer)
library(M3Drop)
#sessionInfo()
#memory.limit(31000000)
rm(list=ls())
setwd("~/Desktop/rstudio/scRNA_scramble")
# Once you do the analysis once, you can just load in the RDS file and skip re-running everything.
#seurat_subset_labeled <- readRDS("~/Desktop/rstudio/scRNA_scramble/seurat_subset_labeled.rds")
seurat_integrated <- readRDS("~/Desktop/rstudio/scRNA_scramble/integrated_seurat.rds")
#annotation <- read.csv("~/Desktop/rstudio/scRNA_scramble/annotations.csv")
# Jointly visualizing the count and gene thresholds shows the joint filtering effect
# Do not remove doublets based on transcript threshholds
# UMI counts per cell should generally be above 500
#Folders in the directory are labeled with these name
file_list <- list("D8","D9","D10","D11","D12","E1","E2","E3")
# Create each individual Seurat object for every sample using a for loop
for (file in file_list){
seurat_data <- Read10X(data.dir = paste0("data/", file))
seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features = 100, project = file, assay = "RNA")
assign(file, seurat_obj) }
OI_mice <- merge(x = D9, y = c(D12,E2,E3), add.cell.id = c("OI","OI","OI","OI"))
WT_mice <- RenameCells(E1, add.cell.id = c("WT"))
OE_mice <- merge(x = D8, y = D11, add.cell.id = c("OE","OE"))
RES_mice <- RenameCells(D10, add.cell.id = c("RES"))
merged_seurat <- merge(x = OI_mice, y = c(WT_mice,OE_mice,RES_mice))
# Check that it merged properly
# View(merged_seurat)
# head(merged_seurat@meta.data)
# tail(merged_seurat@meta.data)
# Add number of genes per UMI for each cell to metadata
merged_seurat$log10GenesPerUMI <- log10(merged_seurat$nFeature_RNA) / log10(merged_seurat$nCount_RNA)
# Compute percent mito ratio. ^mt- for mouse, ^MT- for human
merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^mt-")
merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100
# Add metadata
merged_seurat$sample <- plyr::mapvalues(
x = merged_seurat$orig.ident,
from = c("D8", "D9", "D10", "D11", "D12", "E1", "E2", "E3"),
to = c("D8", "D9", "D10", "D11", "D12", "E1", "E2", "E3") )
merged_seurat$group <- plyr::mapvalues(
x = merged_seurat$orig.ident,
from = c("D8", "D9", "D10", "D11", "D12", "E1", "E2", "E3"),
to = c("OE", "OI", "RES", "OE", "OI", "WT", "OI", "OI") )
merged_seurat$orig.ident <- plyr::mapvalues(
x = merged_seurat$sample,
from = c("D8", "D9", "D10", "D11", "D12", "E1", "E2", "E3"),
to = c("D8", "D9", "D10", "D11", "D12", "E1", "E2", "E3") )
# Create metadata dataframe
metadata <- merged_seurat@meta.data
# Add cell IDs to metadata
metadata$cells <- rownames(metadata)
# Create sample column
metadata$sample <- NA
metadata$sample[which(str_detect(metadata$cells, "^OI_"))] <- "OI"
metadata$sample[which(str_detect(metadata$cells, "^WT_"))] <- "WT"
metadata$sample[which(str_detect(metadata$cells, "^OE_"))] <- "OE"
metadata$sample[which(str_detect(metadata$cells, "^RES_"))] <- "RES"
# Rename columns
#metadata <- metadata %>% dplyr::rename(seq_folder = orig.ident, nCount_RNA = nUMI, nFeature_RNA = nGene)
# Add metadata back to Seurat object
merged_seurat@meta.data <- metadata
# Create .RData object to load at any time
save(merged_seurat, file="~/Desktop/rstudio/scRNA_scramble/merged_seurat.RData")
# Remove raw data to clear up memory
rm("D8");rm("D9");rm("D10");rm("D11");rm("D12");rm("E1");rm("E2");rm("E3")
rm(seurat_data);rm(seurat_obj)
Do samples cluster by tissue type (endo vs. calv)? Do treatment groups cluster (WT, OI, OE, RES)?
merged_seurat <- ScaleData(merged_seurat)
merged_seurat <- FindVariableFeatures(merged_seurat, selection.method = "vst", nfeatures = 1000, verbose = F)
# Run PCA
merged_seurat <- RunPCA(object = merged_seurat)
# Plot PCA
PCAPlot(merged_seurat, split.by = "group")
## Plot Metrics Use this to determine thresholds for filtering. The HBC recommends using combinatorial factors for the highest quality filtering.
e1 <- metadata %>% ggplot(aes(x=group, fill=group)) +
geom_bar() + theme_classic() + ggtitle("Cell Count by Group") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) + xlab("")
e1
metadata %>% ggplot(aes(color=group, x=nCount_RNA, fill=group)) +
geom_density(alpha = 0.2) + scale_x_log10() +
theme_classic() + ylab("Cell density") + geom_vline(xintercept = 500)
Plot the number of genes vs the number of UMIs coloured by the fraction of mitochondrial reads Cells that are poor quality are likely to have low genes and UMIs per cell, and correspond to the data points in the bottom left quadrant of the plot. Good cells will generally exhibit both higher number of genes per cell and higher numbers of UMIs.
metadata %>% ggplot(aes(color=group, x=nFeature_RNA, fill= group)) +
geom_density(alpha = 0.2) + theme_classic() +
scale_x_log10() + geom_vline(xintercept = 300)
With this plot we also evaluate the slope of the line, and any scatter of data points in the bottom right hand quadrant of the plot. These cells have a high number of UMIs but only a few number of genes. These could be dying cells, but also could represent a population of a low complexity celltype (i.e red blood cells).
metadata %>% ggplot(aes(x=group, y=log10(nFeature_RNA), fill=group)) +
geom_boxplot() + theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells vs nFeatures")
Determine whether strong presence of cells with low numbers of genes/UMIs. This metric can identify whether there is a large amount of mitochondrial contamination from dead or dying cells.
metadata %>% ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=mitoRatio)) + geom_point() + stat_smooth(method=lm) +
scale_colour_gradient(low = "gray90", high = "black") + facet_wrap(~group) +
stat_smooth(method=lm) + scale_x_log10() + scale_y_log10() + theme_classic() +
geom_vline(xintercept = 500) + geom_hline(yintercept = 250)
Poor quality samples for mitochondrial counts as cells will surpass the 0.2 mitochondrial ratio mark
metadata %>% ggplot(aes(color=group, x=mitoRatio, fill=group)) +
geom_density(alpha = 0.2) + scale_x_log10() +
theme_classic() + geom_vline(xintercept = 0.2)
Outlier cells in these samples might be cells that have a less complex RNA species than other cells. Sometimes we can detect contamination with low complexity cell types like red blood cells via this metric. Generally, we expect the novelty score to be above 0.80.
metadata %>% ggplot(aes(x=log10GenesPerUMI, color = group, fill=group)) +
geom_density(alpha = 0.2) + theme_classic() + geom_vline(xintercept = 0.8)
# Filter out low quality reads using selected thresholds - these will change with experiment
filtered_seurat <- subset(x = merged_seurat, subset = (nCount_RNA >= 500) & (nFeature_RNA >= 250) &
(log10GenesPerUMI > 0.80) & (mitoRatio < 0.20))
# Extract counts
counts <- GetAssayData(object = filtered_seurat, slot = "counts")
counts2 <- GetAssayData(object = merged_seurat, slot = "counts")
# Output a logical matrix specifying for each gene on whether or not there are more than zero counts per cell
nonzero <- counts > 0
nonzero2 <- counts2 > 0
# Sums all TRUE values and returns TRUE if more than 10 TRUE values per gene
keep_genes <- Matrix::rowSums(nonzero) >= 10
keep_genes2 <- Matrix::rowSums(nonzero2) >= 10
# Only keeping those genes expressed in more than 10 cells
filtered_counts <- counts[keep_genes, ]
filtered_counts2 <- counts[keep_genes2, ]
# Reassign to filtered Seurat object
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)
unfiltered_seurat <- CreateSeuratObject(filtered_counts2, meta.data = merged_seurat@meta.data)
# Save filtered subset to new metadata
metadata_clean <- filtered_seurat@meta.data
metadata <- merged_seurat@meta.data
f1 <- metadata %>% ggplot(aes(x=group, fill=group)) + xlab("") +
geom_bar() + theme_classic() + ggtitle("Unfiltered Cells") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) + xlab("")
f2 <- metadata_clean %>% ggplot(aes(x=group, fill=group)) + xlab("") +
geom_bar() + theme_classic() + ggtitle("Filtered Cells") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) + xlab("")
ggarrange(f1,f2, ncol =2, nrow = 1)
UMI counts (transcripts) per cell: do you observe the removal of cells with less than 500 UMI?
m1 <- metadata %>% ggplot(aes(color=group, x=nCount_RNA, fill= group)) +
geom_density(alpha = 0.2) + scale_x_log10() + xlab("") +
theme_classic() + ylab("Unfiltered Cell density") + geom_vline(xintercept = 500)
m2 <- metadata_clean %>% ggplot(aes(color=group, x=nCount_RNA, fill= group)) +
geom_density(alpha = 0.2) + scale_x_log10() + xlab("") +
theme_classic() + ylab("Filtered Cell density") + geom_vline(xintercept = 500)
ggarrange(m1,m2, ncol =2, nrow = 1)
Genes detected per cell: do you observe the removal of cells with less than 250 genes?
m21 <- metadata %>% ggplot(aes(color=group, x=nFeature_RNA, fill= group)) +
geom_density(alpha = 0.2) + theme_classic() + ggtitle("Unfiltered") +
scale_x_log10() + geom_vline(xintercept = 300) + xlab("")
m22 <- metadata_clean %>% ggplot(aes(color=group, x=nFeature_RNA, fill= group)) +
geom_density(alpha = 0.2) + theme_classic() + ggtitle("Filtered") +
scale_x_log10() + geom_vline(xintercept = 300) + xlab("")
ggarrange(m21,m22, ncol =2, nrow = 1)
Do you observe cells with a high number of UMIs but only a few number of genes?
m31 <- metadata %>% ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=mitoRatio)) +
geom_point() + stat_smooth(method=lm) + xlab("") +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) + scale_x_log10() + ggtitle("Unfiltered") +
scale_y_log10() + theme_classic() + facet_wrap(~group) +
geom_vline(xintercept = 500) + geom_hline(yintercept = 250)
m32 <- metadata_clean %>% ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=mitoRatio)) +
geom_point() + stat_smooth(method=lm) + xlab("") +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) + scale_x_log10() + ggtitle("Filtered") +
scale_y_log10() + theme_classic() + facet_wrap(~group) +
geom_vline(xintercept = 500) + geom_hline(yintercept = 250)
ggarrange(m31,m32, ncol =2, nrow = 1)
Visualize the distribution of mitochondrial gene expression detected per cell. Poor quality samples for mitochondrial counts as cells will surpass the 0.2 mitochondrial ratio mark. Do you observe the removal of cells with more than 0.2 mitochondrial counts ratio?
# Compute percent mito ratio
#merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^MT-")
#merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100
filtered_seurat$log10GenesPerUMI <- log10(filtered_seurat$nFeature_RNA) / log10(filtered_seurat$nCount_RNA)
filtered_seurat$mitoRatio <- PercentageFeatureSet(object = filtered_seurat, pattern = "^MT-")
filtered_seurat$mitoRatio <- filtered_seurat@meta.data$mitoRatio / 100
m41 <- metadata %>% ggplot(aes(color=group, x=mitoRatio, fill=group)) + ggtitle("Unfiltered") +
geom_density(alpha = 0.2) + scale_x_log10() + xlab("") +
theme_classic() + geom_vline(xintercept = 0.2)
m42 <- metadata_clean %>% ggplot(aes(color=group, x=mitoRatio, fill=group)) + ggtitle("Filtered") +
geom_density(alpha = 0.2) + scale_x_log10() + xlab("") +
theme_classic() + geom_vline(xintercept = 0.2)
ggarrange(m41,m42, ncol =2, nrow = 1)
Outlier cells in these samples might be cells that have a less complex RNA species than other cells. Generally, we expect the novelty score to be above 0.80. Do you observe the removal of cells with less than 0.8 log10GenesPerUMI?
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
plot1 <- metadata %>% ggplot(aes(x=log10GenesPerUMI, color=group, fill=group)) + xlim(0.5,1) +
geom_density(alpha = 0.2) + theme_classic() + geom_vline(xintercept = 0.8)
plot2 <- metadata_clean %>% ggplot(aes(x=log10GenesPerUMI, color=group, fill=group)) + xlim(0.5,1) +
geom_density(alpha = 0.2) + theme_classic() + geom_vline(xintercept = 0.8)
ggarrange(plot1,plot2, ncol =2, nrow = 1)
This step of the workflow involves exploring our data to identify which covariates we would like to regress out, and removing unwanted sources of variation. To perform PCA, we need to first choose the most variable features, then scale the data.
# Normalize the counts
seurat_phase <- NormalizeData(filtered_seurat)
cell_cycle_genes <- read.csv("Mus_musculus_cell_cycle.csv")
View(cell_cycle_genes)
cell_cycle_genes <- cell_cycle_genes[,1:3]
g2m_genes <- cell_cycle_genes[1:54,]
s_genes <- cell_cycle_genes[55:97,]
# Extract Ensembldb database info
ah <- AnnotationHub()
ahDb <- query(ah, pattern = c("Mus musculus", "EnsDb"))
id <- ahDb %>% mcols() %>% rownames() %>% tail(n = 1)
edb <- ah[[id]] #organism(edb)
annotations <- ensembldb::genes(edb, return.type = "data.frame")
annotations <- annotations %>% dplyr::select(gene_id, gene_name, description)
annotations_cc <- ensembldb::genes(edb, return.type = "data.frame") %>%
dplyr::select(gene_id, gene_name) %>%
dplyr::filter(gene_id %in% cell_cycle_genes$gene)
# Get gene names for Ensembl IDs for each gene
cell_cycle_markers <- dplyr::left_join(cell_cycle_genes, annotations_cc, by = c("geneID" = "gene_id"))
# Acquire the phase genes
s_genes <- cell_cycle_markers %>% dplyr::filter(phase == "S") %>% pull("gene_name")
g2m_genes <- cell_cycle_markers %>% dplyr::filter(phase == "G2/M") %>% pull("gene_name")
write.csv(annotations, "~/Desktop/rstudio/scRNA_scramble/annotation.csv")
write.csv(annotations_cc, "~/Desktop/rstudio/scRNA_scramble/annotations_cell_cycle_genes.csv")
# Load cell cycle markers
# load("data/cycle.rda")
# cycle <- load("data/cycle.rda")
# Score cells for cell cycle
seurat_phase <- CellCycleScoring(seurat_phase, s.features = s_genes, g2m.features = g2m_genes, set.ident = TRUE)
# View cell cycle scores and phases assigned to cells
View(seurat_phase@meta.data)
# Identify the most variable genes
seurat_phase <- FindVariableFeatures(seurat_phase, selection.method = "vst", nfeatures = 1000, verbose = F)
# Scale the counts
seurat_phase <- ScaleData(seurat_phase)
# Perform PCA
seurat_phase <- RunPCA(seurat_phase)
# Plot the PCA colored by cell cycle phase
DimPlot(seurat_phase,reduction = "pca", group.by= "Phase", split.by = "Phase")
Since we have two samples in our dataset, we want to keep them as separate objects and transform them as that is what is required for integration. First split the cells in filtered_seurat object into “Control” and “Stimulated”
# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
split_seurat <- SplitObject(filtered_seurat, split.by = "group")
split_seurat <- split_seurat[c("OE","OI","WT","RES")]
#split_seurat <- split_seurat[c("endo", "calv")]
The Sctransform function provides advanced normalization and variance stabilization of the data, and also regresses out sources of unwanted variation in our data. The sctransform method models the UMI counts using a regularized negative binomial model to remove the variation due to sequencing depth (total nCounts per cell).
# Normalize the counts
options(future.globals.maxSize = 4000 * 1024^2)
for (i in 1:length(split_seurat)) {
split_seurat[[i]] <- NormalizeData(split_seurat[[i]], verbose = TRUE)
split_seurat[[i]] <- CellCycleScoring(split_seurat[[i]], g2m.features=g2m_genes, s.features=s_genes)
split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio")) } # Regress Phase for cell cycle
# UMI is automatically being regressed out, no need to specify it here.
# rRNA is included in the mitoRatio, no need to regress that out separately.
# Check which assays are stored in objects
#split_seurat$OI@assays
#split_seurat$WT@assays
If we want to ultimately compare celltype expression between groups it is recommended to integrate the data.
# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = split_seurat, nfeatures = 2000)
# Prepare the SCT list object for integration
split_seurat <- PrepSCTIntegration(object.list = split_seurat, anchor.features = integ_features)
# Find best buddies - can take a while to run
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat, normalization.method = "SCT", anchor.features = integ_features)
# Integrate across conditions
seurat_integrated <- IntegrateData(anchorset = integ_anchors, normalization.method = "SCT")
# Run PCA
seurat_integrated <- RunPCA(object = seurat_integrated)
# Plot PCA
PCAPlot(seurat_integrated, split.by = "group")
# Run UMAP
seurat_integrated <- RunUMAP(seurat_integrated, dims = 1:10, reduction = "pca")
# Plot UMAP
DimPlot(seurat_integrated)
# Save integrated seurat object
saveRDS(seurat_integrated, "~/Desktop/rstudio/scRNA_scramble/integrated_seurat.rds")
# Explore heatmap of PCs
DimHeatmap(seurat_integrated, dims = 1:9, cells = 500, balanced = TRUE) # make 500 - 1000
# Printing out the most variable genes driving PCs
print(x = seurat_integrated[["pca"]], dims = 1:9, nfeatures = 5)
pca_loadings <- Loadings(seurat_integrated[["pca"]])
#pca_loadings <- seurat_integrated@reductions$pca@feature.loadings[,1]
PCs 8 - 10
ElbowPlot(object = seurat_integrated, ndims = 10)
Resampling Tests for Clustering. HBC does not use JackStraw, as it doesn’t really help make decisions about the number of dimensions to use.
## Resampling test inspired by the jackStraw procedure (MEMORY INTENSIVE!)
seurat_integrated <- JackStraw(object = seurat_integrated, reduction = "pca", dims = 10, num.replicate = 25, prop.freq = 0.1, verbose = FALSE)
The JackStrawPlot displays the distribution of p-values for each PC with a uniform distribution (dashed line). ‘Significant’ PCs will show a strong enrichment of genes with low p-values (solid curve above the dashed line). In this case it appears that PCs 1-10 are significant.
seurat_integrated <- ScoreJackStraw(object = seurat_integrated, dims = 1:10, reduction = "pca")
JackStrawPlot(object = seurat_integrated, dims = 1:10, reduction = "pca")
###################################
Seurat assigns cells to clusters based on their PCA scores derived from the expression of the integrated most variable genes.
# Determine the K-nearest neighbor graph
seurat_integrated <- FindNeighbors(object = seurat_integrated, dims = 1:10, annoy.metric = "manhattan")
# Determine the clusters for various resolutions
seurat_integrated <- FindClusters(object = seurat_integrated, resolution = c(0.5)) #res series
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 13707
## Number of edges: 420047
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9224
## Number of communities: 17
## Elapsed time: 0 seconds
Seurat assigns cells to clusters based on their PCA scores derived from the expression of the integrated most variable genes.
# Determine the K-nearest neighbor graph
seurat_integrated <- FindNeighbors(object = seurat_integrated, dims = 1:10, annoy.metric = "manhattan")
# Determine the clusters for various resolutions
seurat_integrated <- FindClusters(object = seurat_integrated, resolution = c(0.5)) #res series
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 13707
## Number of edges: 420047
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9224
## Number of communities: 17
## Elapsed time: 0 seconds
# Explore resolutions
seurat_integrated@meta.data %>%
View()
# Assign identity of clusters
#Idents(object = seurat_integrated) <- "assigned_clusters"
# Plot the UMAP
DimPlot(seurat_integrated, reduction = "umap", label = TRUE, label.size = 4)
StashIdent(object = seurat_integrated, save.name = "assigned_idents")
## An object of class Seurat
## 35885 features across 13707 samples within 3 assays
## Active assay: integrated (2000 features, 2000 variable features)
## 2 other assays present: RNA, SCT
## 2 dimensional reductions calculated: pca, umap
Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
# Plot the UMAP
DimPlot(seurat_integrated, reduction = "umap", label = TRUE, label.size = 4)
# Extract identity and sample information from seurat object to determine the number of cells per cluster per sample
n_cells <- FetchData(seurat_integrated, vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>% tidyr::spread(ident, n)
# View table
View(n_cells)
# UMAP of cells in each cluster by sample
DimPlot(seurat_integrated, label = TRUE, split.by = "group") + NoLegend()
Make sure you have all clusters in all stages, no clusters appearing in just one phase (unless its biologically relevant, as with stem cells) As a reminder, G1 is the growth phase. S is DNA synthesis phase. G2M is the mitotic check point control and cell division.
# Explore whether clusters segregate by cell cycle phase
DimPlot(seurat_integrated, label = TRUE, split.by = "Phase") + NoLegend()
#We do not see much clustering by cell cycle score, so we can proceed with the QC.
# Determine metrics to plot present in seurat_integrated@meta.data
metrics <- c("nCount_RNA", "nFeature_RNA", "G2M.Score", "mitoRatio")
FeaturePlot(seurat_integrated, reduction = "umap", features = metrics, pt.size = 0.4,
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE)
# Defining the information in the seurat object of interest
columns <- c(paste0("PC_", 1:5), "ident","UMAP_1", "UMAP_2")
# Extracting this data from the seurat object
pc_data <- FetchData(seurat_integrated, vars = columns)
# Extract the UMAP coordinates for the first 10 cells
seurat_integrated@reductions$umap@cell.embeddings[1:5, 1:2]
## UMAP_1 UMAP_2
## OE_AAACCCAAGATCACCT-1_1 0.9633908 10.8307542
## OE_AAACCCAAGCCTTCTC-1_1 -5.7255205 0.5835256
## OE_AAACCCATCCAAGGGA-1_1 4.2135297 1.4773312
## OE_AAACCCATCTAGCCTC-1_1 -6.2587794 0.8183785
## OE_AAACGAAAGACCACGA-1_1 2.2734665 10.9859427
# Adding cluster label to center of cluster on UMAP
umap_label <- FetchData(seurat_integrated,
vars = c("ident", "UMAP_1", "UMAP_2")) %>% group_by(ident) %>% summarise(x=mean(UMAP_1), y=mean(UMAP_2))
# Plotting a UMAP plot for each of the PCs
map(paste0("PC_", 1:5), function(pc){
ggplot(pc_data, aes(UMAP_1, UMAP_2)) + geom_point(aes_string(color=pc), alpha = 0.7) +
scale_color_gradient(guide = FALSE, low = "grey90", high = "blue") + xlab("") + ylab("") +
geom_text(data=umap_label, aes(label=ident, x, y)) + ggtitle(pc) }) %>% plot_grid(plotlist = .)
# Examine PCA results
print(seurat_integrated[["pca"]], dims = 1:6, nfeatures = 10)
## PC_ 1
## Positive: Col1a1, Col1a2, Sparc, Ibsp, Spp1, Col11a1, Ifitm5, Serpinf1, Bglap, Col5a2
## Negative: Tmsb4x, Tyrobp, Fcer1g, Lyz2, Srgn, Cxcl2, Coro1a, Slfn2, Cd14, Cd52
## PC_ 2
## Positive: Ibsp, Col1a1, Col1a2, Spp1, Ifitm5, Bglap, Col11a2, Sgms2, Sparc, Cgref1
## Negative: Igfbp7, Col4a1, Col4a2, Mgp, Sparcl1, Gng11, Cd34, Esam, Egfl7, Cav1
## PC_ 3
## Positive: Ibsp, Spp1, Ifitm5, Col1a2, Col1a1, Bglap, Col11a2, Pecam1, Col4a1, Egfl7
## Negative: Ptn, Igfbp5, 1500015O10Rik, Mgp, Itm2a, Col3a1, Aspn, Matn4, Cdkn1c, Sfrp2
## PC_ 4
## Positive: Stmn1, Hmgb2, Hist1h2ap, H2afz, Birc5, Top2a, Hist1h1b, 2810417H13Rik, Ube2c, Cenpf
## Negative: Col1a2, Col1a1, Mgp, 1500015O10Rik, Sparc, Matn4, Col14a1, Igfbp5, Cdkn1c, Rbp1
## PC_ 5
## Positive: S100a8, S100a9, Gm5483, BC100530, Pglyrp1, Stfa1, Hp, Wfdc21, Stfa2l1, Stfa2
## Negative: Apoe, C1qb, C1qa, Pf4, C1qc, Mrc1, Csf1r, Ms4a7, Lgmn, Mt1
## PC_ 6
## Positive: Postn, Ptn, Aspn, Acta2, Tnn, Col3a1, Lum, Tnc, Mmp13, Tagln
## Negative: 1500015O10Rik, Rbp1, Col14a1, Col1a2, Matn4, Mgp, Col1a1, Cdkn1c, Eln, Igf1
DimPlot(object = seurat_integrated, reduction = "umap", label = TRUE) + NoLegend()
# Select the RNA counts slot to be the default assay
# The SCTransform normalization was performed only on the 3000 most variable genes, so many of our genes of interest may not be present in the SCT slot, but is in the RNA slot.
DefaultAssay(seurat_integrated) <- "RNA"
# Normalize RNA data for visualization purposes
seurat_integrated <- NormalizeData(seurat_integrated, verbose = FALSE)
For evaluating a single sample group/condition. This compares each cluster against all other clusters to identify potential marker genes.
# Find markers for every cluster compared to all remaining cells, report only the positive ones
markers <- FindAllMarkers(object = seurat_integrated, only.pos = TRUE, logfc.threshold = 0.30, min.diff.pct = 0.10, min.pct = 0.15)
View(markers)
write.csv(markers, "~/Desktop/rstudio/jacobsen/results/markers_all_clusters.csv")
# min.diff.pct = minimum percent difference between the percent of cells expressing the gene in the cluster
# min.pct = the percent of cells expressing gene in all other clusters combined. higher threshold will filter more genes
Since we have samples representing different conditions in our dataset, our best option is to find conserved markers. This function internally separates out cells by sample group/condition, and then performs differential gene expression testing for a single specified cluster against all other clusters
memory.limit(31000) # increase the memory limit to 31 GB
#DefaultAssay(seurat_integrated) <- "RNA" #For clustering use raw values
#DefaultAssay(seurat_integrated) <- "integrated"
#ident.2 is your cluster of interest. Lower the min.pct threshold to reduce memory usage
cluster2_conserved_markers <- FindConservedMarkers(seurat_integrated, ident.1 = 10, grouping.var = "group",
only.pos = TRUE, logfc.threshold = 0.30, min.diff.pct = 0.10, min.pct = 0.15)
head(cluster2_conserved_markers)
annotations <- read.csv("annotations.csv")
cluster2_annot_markers <- cluster2_conserved_markers %>% rownames_to_column(var="gene") %>%
left_join(y = unique(annotations[, c("gene_name", "description")]), by = c("gene" = "gene_name"))
head(cluster2_annot_markers)
## gene WT_p_val WT_avg_logFC WT_pct.1 WT_pct.2 WT_p_val_adj
## 1 Gpr132 1.364547e-127 1.323361 0.656 0.002 2.314544e-123
## 2 Napsa 9.837394e-116 1.703932 0.625 0.003 1.668619e-111
## 3 Cybb 6.691081e-110 2.323785 0.812 0.016 1.134941e-105
## 4 Cytip 1.495931e-109 1.940442 0.719 0.009 2.537399e-105
## 5 Ctss 3.818149e-99 1.933767 0.812 0.020 6.476345e-95
## 6 Ptprc 6.738125e-99 2.083188 0.844 0.023 1.142921e-94
## OE_p_val OE_avg_logFC OE_pct.1 OE_pct.2 OE_p_val_adj RES_p_val
## 1 4.508454e-59 0.7167272 0.706 0.179 7.647239e-55 1.563163e-277
## 2 1.153627e-53 0.8635907 0.868 0.372 1.956783e-49 1.755170e-306
## 3 2.513203e-27 0.5702192 0.939 0.526 4.262895e-23 5.385926e-181
## 4 2.293428e-70 1.1339542 0.964 0.479 3.890112e-66 0.000000e+00
## 5 6.053331e-131 1.6726717 0.970 0.206 1.026766e-126 4.166122e-201
## 6 1.433232e-26 0.4835575 0.970 0.711 2.431048e-22 7.573534e-170
## RES_avg_logFC RES_pct.1 RES_pct.2 RES_p_val_adj OI_p_val OI_avg_logFC
## 1 1.067480 0.617 0.004 2.651436e-273 0 0.7632887
## 2 1.546911 0.633 0.002 2.977119e-302 0 1.1970547
## 3 1.750986 0.633 0.014 9.135607e-177 0 1.6992372
## 4 2.147151 0.800 0.008 0.000000e+00 0 1.6496976
## 5 2.143447 0.883 0.031 7.066577e-197 0 1.7567123
## 6 1.633456 0.800 0.030 1.284623e-165 0 1.6041024
## OI_pct.1 OI_pct.2 OI_p_val_adj max_pval minimump_p_val
## 1 0.308 0.004 0 4.508454e-59 0
## 2 0.415 0.005 0 1.153627e-53 0
## 3 0.509 0.021 0 2.513203e-27 0
## 4 0.500 0.010 0 2.293428e-70 0
## 5 0.701 0.020 0 3.818149e-99 0
## 6 0.719 0.025 0 1.433232e-26 0
## description
## 1 G protein-coupled receptor 132 [Source:MGI Symbol;Acc:MGI:1890220]
## 2 napsin A aspartic peptidase [Source:MGI Symbol;Acc:MGI:109365]
## 3 cytochrome b-245, beta polypeptide [Source:MGI Symbol;Acc:MGI:88574]
## 4 cytohesin 1 interacting protein [Source:MGI Symbol;Acc:MGI:2183535]
## 5 cathepsin S [Source:MGI Symbol;Acc:MGI:107341]
## 6 protein tyrosine phosphatase, receptor type, C [Source:MGI Symbol;Acc:MGI:97810]
get_conserved <- function(cluster){
FindConservedMarkers(seurat_integrated, ident.1 = 5, grouping.var = "group", only.pos = TRUE) %>%
rownames_to_column(var = "gene") %>%
left_join(y = unique(annotations[, c("gene_name", "description")]), by = c("gene" = "gene_name")) %>%
cbind(cluster_id = cluster, .) }
# Iterate function across desired clusters
#conserved_markers <- map_dfr(c(7,20), get_conserved) For contrast between clusters 7 and 20
conserved_markers <- map_dfr(c(0:16), get_conserved)
# Extract top 10 markers per cluster
top20 <- conserved_markers %>% mutate(avg_fc = (OI_avg_logFC + OE_avg_logFC + WT_avg_logFC + RES_avg_logFC) / 4) %>%
group_by(cluster_id) %>% top_n(n = 20, wt = avg_fc)
# Visualize top 10 markers per cluster
View(top20)
write.csv(top20, "~/Desktop/rstudio/jacobsen/results/top20_OI.csv")
FeaturePlot(seurat_integrated, reduction = "umap",
features = c("Aspn","Ptn","Thbs2","Ogn","Mfap4"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE)
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Gphg2","Matn4","Igf1","Mfap4"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE)
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Acan", "Col2a1", "Runx2"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
#Mfap4-
FeaturePlot(seurat_integrated, features = c("Cd34","Col1a1", "Mfap4","Mlip","Tcf7","Tnc","Panx3","Pth1r"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
FeaturePlot(seurat_integrated, features = c("Bglap", "Col1a1", "Dmp1","Ifitm5"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
### LateOB
FeaturePlot(seurat_integrated, features = c("Col1a1","Dmp1","Tnfrsf11b"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
FeaturePlot(seurat_integrated, features = c("Birc5", "Cenpf",
"Top2a","Pcr1","Bex2","Ank3","Celf2","Cap43","Nav2"), sort.cell = TRUE,
min.cutoff = 'q10', label = TRUE, repel = TRUE)
FeaturePlot(seurat_integrated, features = c("Pdgfra", "Ly6a", "Igfbp6"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
### Endothelial
FeaturePlot(seurat_integrated, features = c("Pecam1","Col4a1","Esam"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
FeaturePlot(seurat_integrated, features = c("Rgs5","Kcnj8","Acta2", "Notch3"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
### Plot gene markers paS cells
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Pdgfra", "Ly6a", "Igfbp6"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
### Chondrocyte
FeaturePlot(seurat_integrated, reduction = "umap",
features = c("Acan","Sox9","Lect1","Matn1","Col9a1", "Col2a1", "3110079O15Rik"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
### Hematapoetic
FeaturePlot(seurat_integrated, features = c("Cd68","Cd14","Cd48","Cd244","Kit","Cd38","Ptprc", "Fcrla"),
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE, repel = TRUE)
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Cd68", "Cd14", "Csf1r",
"Ptprc","Acp5","Siglec1","Cd45","Fcgr3", "Ccr5"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Pecam1","Col4a1"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
FeaturePlot(seurat_integrated, reduction = "umap", features = c("Col1a1"),
label = TRUE, sort.cell = TRUE, min.cutoff = 'q10', repel = TRUE)
VlnPlot(object = seurat_integrated, group.by = "group",
features = c("Atf5", "Atf4","Hspa5", "Hspa9", "Trib3","Col1a1"), pt.size = 0.001, sort=TRUE)
#var1 <- FetchData(seurat_integrated, vars = c('groups',"Atf5"))
var1 <- FetchData(seurat_integrated, vars = c("Atf5", "Atf4","Hspa5", "Hspa9", "Trib3","Col1a1"))
head(var1)
## Atf5 Atf4 Hspa5 Hspa9 Trib3 Col1a1
## OE_AAACCCAAGATCACCT-1_1 0 1.4555337 2.0246608 0.9720604 0 0.9720604
## OE_AAACCCAAGCCTTCTC-1_1 0 0.0000000 1.4353284 0.0000000 0 0.0000000
## OE_AAACCCATCCAAGGGA-1_1 0 2.5402028 0.0000000 0.0000000 0 0.0000000
## OE_AAACCCATCTAGCCTC-1_1 0 2.1260012 0.5989149 0.0000000 0 0.0000000
## OE_AAACGAAAGACCACGA-1_1 0 0.7237965 2.5403985 0.0000000 0 0.0000000
## OE_AAACGAATCCGAGCTG-1_1 0 0.0000000 1.5124741 0.0000000 0 1.0184762
VlnPlot(object = seurat_integrated, group.by = "group", features = c("Cox6a2", "Eif3c", "Slc7a3", "Col15a1","Dcn","Wif1","Hbba1"))
VlnPlot(object = seurat_integrated, group.by = "group", features = c("Canx","Hspa5","Pdia3","Ddit3","Eif2ak3","Ero1l"), pt.size=0)
VlnPlot(object = seurat_integrated, group.by = "group", features = c("Col3a1","Rgs5","Kcnj8","Abcc9"))
# Determine differentiating markers for cluster 1 cells
# cluster3_markers <- FindMarkers(seurat_integrated, ident.1 = "3", ident.2 = 4, logfc.threshold = log(2))
# Add gene symbols to the DE table
# cluster3_markers <- cluster3_markers %>% rownames_to_column(var = "gene") %>%
# left_join(y = unique(annotations[, c("gene_name", "description")]), by = c("gene" = "gene_name"))
# Reorder columns and sort by padj
# cluster3_markers <- cluster3_markers[, c(1, 3:5,2,6:7)]
# cluster3_markers <- cluster3_markers %>% dplyr::arrange(p_val_adj)
# View data
# View(cluster3_markers)
# Save integrated seurat object
saveRDS(seurat_integrated, "~/Desktop/rstudio/jacobsen/integrated_seurat.rds")
# Save idents
#seurat_integrated@meta.data[["assigned_clusters"]] <- StashIdent(object = seurat_integrated@active.ident, save.name = "orig.ident")
#seurat_integrated[[1]][["assigned_cluster"]] <- Idents(object = seurat_integrated[[1]])
seurat_integrated <- StashIdent(object = seurat_integrated, save.name = "assigned_ident_orig")
# Restore numerical Cluster IDs
Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
# Set More Specific Identities
seurat_integrated <- RenameIdents(object = seurat_integrated,
"0" = "PreOB", # Apsn, Ptn, Thbs2, Ogn, Mfap4
"1" = "MatureOB", # Bglap, Col1a1, Dmp1, Ifitm5
"2" = "MatureOB", # Bglap, Col1a1, Dmp1, Ifitm5
"3" = "MSC", # Birc5, Cenpf, Top2a, Pcr1, Bex2, Ank3, Celf2, Cap43, Nav2
"4" = "Connective", # Pdgfra, Ly6a, Igfbp6
"5" = "PreOB_Chondrolike", # Gphg2, Matn4, Igf1 Mfap4
"6" = "Macrophage", # Cd68, Cd14, Csf1r, Ptprc, Acp5, Siglec1, Cd45, Fcgr3, Ccr5
"7" = "PreOB", # Apsn, Ptn, Thbs2, Ogn, Mfap4
"8" = "LateOB", # Col1a1, Dmp1, Tnfrsf11b
"9" = "Endothelial", # Pecam1, Col4a1, Esam
"10" = "MSC", # Birc5, Cenpf, Top2a, Pcr1, Bex2, Ank3, Celf2, Cap43, Nav2
"11" = "ImmatureOB", # Col1a1, Mfap4-, Mlip, Tcf7, Tnc, Panx3, Pth1r
"12" = "α-SMA+", # Rgs5, Kcnj8, Notch3, Acta2
"13" = "pAS", # Pdgfra, Ly6a, Igfbp6
"14" = "Chondrocyte", # Acan, Sox9, Lect1, Matn1, Col9a1, Col2a1, 3110079O15Rik
"15" = "Hematopoietic", # Ptprc, Fcrla, Cd68,Cd14,Cd48,Cd244,Kit,Cd38
"16" = "Hematopoietic", # Ptprc, Fcrla, Cd68,Cd14,Cd48,Cd244,Kit,Cd38
"17" = "α-SMA+" # Rgs5, Kcnj8, Notch3, Acta2
)
# Save the specific identities
seurat_integrated <- StashIdent(object = seurat_integrated, save.name = "assigned_ident_specific")
# Restore numerical Cluster IDs to set identities for next renaming
Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
# Set More Identities for ratios (Col1a1 expressing vs other)
seurat_integrated <- RenameIdents(object = seurat_integrated,
"0" = "PreOB", # PreOB
"1" = "MatureOB", # MatureOB
"2" = "MatureOB", # MatureOB
"3" = "Other", # MSC
"4" = "Other", # Connective
"5" = "Chondrocyte", # PreOB_Chondrolike
"6" = "Other", # Macrophage
"7" = "PreOB", # PreOB
"8" = "LateOB", # LateOB
"9" = "Endothelial", # Endothelial
"10" = "MSC", # MSC
"11" = "ImmatureOB", # ImmatureOB
"12" = "Other", # α-SMA+
"13" = "Other", # pAS
"14" = "Chondrocyte", # Chondrocyte
"15" = "Other", # Hematopoietic
"16" = "Other", # Hematopoietic
"17" = "Other" # α-SMA+
)
seurat_integrated <- StashIdent(object = seurat_integrated, save.name = "assigned_ident_general")
# Restore numerical Cluster IDs
Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
# Set Ratio Calculating Identities
seurat_integrated <- RenameIdents(object = seurat_integrated,
"0" = "Col1a1", "1" = "Col1a1", "2" = "Col1a1", "3" = "Col1a1",
"4" = "Col1a1", "5" = "Col1a1", "6" = "Other", "7" = "Col1a1",
"8" = "Col1a1", "9" = "Other", "10" = "Col1a1", "11" = "Col1a1",
"12" = "Other", "13" = "Col1a1", "14" = "Other", "15" = "Other",
"16" = "Other", "17" = "Other")
seurat_integrated <- StashIdent(object = seurat_integrated, save.name = "assigned_ident_col1a1")
# Plot the UMAP and check different labels
#Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
Idents(object = seurat_integrated) <- "assigned_ident_general"
#Idents(object = seurat_integrated) <- "assigned_ident_specific"
#Idents(object = seurat_integrated) <- "assigned_ident_col1a1"
DimPlot(object = seurat_integrated, reduction = "umap", label = TRUE, label.size = 3, repel = TRUE)
ggsave("UMAP.tiff", device = "tiff", scale = 1.5, dpi = 300,
path ="~/Desktop/rstudio/jacobsen/results", width = 7, height = 5, units = "in")
Idents(object = seurat_integrated) <- "assigned_ident_col1a1"
seurat_subset_labeled <- SplitObject(seurat_integrated, split.by = "group")
seurat_subset_OE_col1a1 <- WhichCells(seurat_subset_labeled[["OE"]], assigned_idents_col1a1 = "Col1a1")
seurat_subset_OI_col1a1 <- WhichCells(seurat_subset_labeled[["OI"]], assigned_idents_col1a1 = "Col1a1")
seurat_subset_WT_col1a1 <- WhichCells(seurat_subset_labeled[["WT"]], assigned_idents_col1a1 = "Col1a1")
seurat_subset_RES_col1a1 <- WhichCells(seurat_subset_labeled[["RES"]], assigned_idents_col1a1 = "Col1a1")
seurat_subset_OE_col1a1_other <- WhichCells(seurat_subset_labeled[["OE"]], idents = "Other")
seurat_subset_OI_col1a1_other <- WhichCells(seurat_subset_labeled[["OI"]], idents = "Other")
seurat_subset_WT_col1a1_other <- WhichCells(seurat_subset_labeled[["WT"]], idents = "Other")
seurat_subset_RES_col1a1_other <- WhichCells(seurat_subset_labeled[["RES"]], idents = "Other")
len_OE_col1a1 <- length(seurat_subset_OE_col1a1)
len_OI_col1a1 <- length(seurat_subset_OI_col1a1)
len_WT_col1a1 <- length(seurat_subset_WT_col1a1)
len_RES_col1a1 <- length(seurat_subset_RES_col1a1)
len_OE_col1a1_other <- length(seurat_subset_OE_col1a1_other)
len_OI_col1a1_other <- length(seurat_subset_OI_col1a1_other)
len_WT_col1a1_other <- length(seurat_subset_WT_col1a1_other)
len_RES_col1a1_other <- length(seurat_subset_RES_col1a1_other)
length1c <- c(len_OE_col1a1,len_OI_col1a1,len_WT_col1a1,len_RES_col1a1) # chondrocytes
length2c <- c(len_OE_col1a1_other,len_OI_col1a1_other,len_WT_col1a1_other,len_RES_col1a1_other) # other
lenc <- data.frame(length1c,length2c)
colnames(lenc) <- c("Col1a1", "Other")
groups <- c("OE","OI","WT","RES")
lengthsc <- cbind(groups,lenc)
lengthsc
## groups Col1a1 Other
## 1 OE 1478 260
## 2 OI 8649 1749
## 3 WT 980 155
## 4 RES 2600 354
ratio_col_other <- lengthsc["Col1a1"] / (lengthsc["Other"]) # Col1a1+ to Other ratio
lengthsc1 <- cbind(groups,ratio_col_other)
colnames(lengthsc1) <- c("Groups","Col_Other_ratio")
g1c <- ggplot(lengthsc1, aes(x=groups, y=Col_Other_ratio)) + geom_point(size=4) +
ggtitle("Ratio of Col1a1+ Cells to Other") + xlab("") + ylab("Ratio")
g1c
metadata_clean_subset <- seurat_integrated@meta.data
# Plot
f3 <- metadata_clean_subset %>% ggplot(aes(x=group, fill=assigned_ident_col1a1)) + xlab("") +
geom_bar() + theme_classic() + ggtitle("Proportion of Cell Types") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) + xlab("")
f3
ggsave("Population_proportions.tiff", device = "tiff", scale = 1.5, dpi = 300,
path ="~/Desktop/rstudio/jacobsen/results", width = 7, height = 5, units = "in")
Idents(object = seurat_integrated) <- "assigned_ident_general"
seurat_subset_labeled <- subset(seurat_integrated, idents = "Other", invert = T)
seurat_subset2 <- SplitObject(seurat_subset_labeled, split.by = "group")
seurat_subset_OE_preOB <- WhichCells(seurat_subset2[["OE"]], idents = "PreOB")
seurat_subset_OI_preOB <- WhichCells(seurat_subset2[["OI"]], idents = "PreOB")
seurat_subset_WT_preOB <- WhichCells(seurat_subset2[["WT"]], idents = "PreOB")
seurat_subset_RES_preOB <- WhichCells(seurat_subset2[["RES"]], idents = "PreOB")
seurat_subset_OE_imOB <- WhichCells(seurat_subset2[["OE"]], idents = "ImmatureOB")
seurat_subset_OI_imOB <- WhichCells(seurat_subset2[["OI"]], idents = "ImmatureOB")
seurat_subset_WT_imOB <- WhichCells(seurat_subset2[["WT"]], idents = "ImmatureOB")
seurat_subset_RES_imOB <- WhichCells(seurat_subset2[["RES"]], idents = "ImmatureOB")
seurat_subset_OE_mOB <- WhichCells(seurat_subset2[["OE"]], idents = "MatureOB")
seurat_subset_OI_mOB <- WhichCells(seurat_subset2[["OI"]], idents = "MatureOB")
seurat_subset_WT_mOB <- WhichCells(seurat_subset2[["WT"]], idents = "MatureOB")
seurat_subset_RES_mOB <- WhichCells(seurat_subset2[["RES"]], idents = "MatureOB")
seurat_subset_OE_lOB <- WhichCells(seurat_subset2[["OE"]], idents = "LateOB")
seurat_subset_OI_lOB <- WhichCells(seurat_subset2[["OI"]], idents = "LateOB")
seurat_subset_WT_lOB <- WhichCells(seurat_subset2[["WT"]], idents = "LateOB")
seurat_subset_RES_lOB <- WhichCells(seurat_subset2[["RES"]], idents = "LateOB")
seurat_subset_OE_chondro <- WhichCells(seurat_subset2[["OE"]], idents = "Chondrocyte")
seurat_subset_OI_chondro <- WhichCells(seurat_subset2[["OI"]], idents = "Chondrocyte")
seurat_subset_WT_chondro <- WhichCells(seurat_subset2[["WT"]], idents = "Chondrocyte")
seurat_subset_RES_chondro <- WhichCells(seurat_subset2[["RES"]], idents = "Chondrocyte")
seurat_subset_OE_endo <- WhichCells(seurat_subset2[["OE"]], idents = "Endothelial")
seurat_subset_OI_endo <- WhichCells(seurat_subset2[["OI"]], idents = "Endothelial")
seurat_subset_WT_endo <- WhichCells(seurat_subset2[["WT"]], idents = "Endothelial")
seurat_subset_RES_endo <- WhichCells(seurat_subset2[["RES"]], idents = "Endothelial")
seurat_subset_OE_msc <- WhichCells(seurat_subset2[["OE"]], idents = "MSC")
seurat_subset_OI_msc <- WhichCells(seurat_subset2[["OI"]], idents = "MSC")
seurat_subset_WT_msc <- WhichCells(seurat_subset2[["WT"]], idents = "MSC")
seurat_subset_RES_msc <- WhichCells(seurat_subset2[["RES"]], idents = "MSC")
#Calcuate number of cells in each cell type
len_OE_chondro <- length(seurat_subset_OE_chondro)
len_OI_chondro <- length(seurat_subset_OI_chondro)
len_WT_chondro <- length(seurat_subset_WT_chondro)
len_RES_chondro <- length(seurat_subset_RES_chondro)
len_OE_imOB <- length(seurat_subset_OE_imOB)
len_OI_imOB <- length(seurat_subset_OI_imOB)
len_WT_imOB <- length(seurat_subset_WT_imOB)
len_RES_imOB <- length(seurat_subset_RES_imOB)
len_OE_preOB <- length(seurat_subset_OE_preOB)
len_OI_preOB <- length(seurat_subset_OI_preOB)
len_WT_preOB <- length(seurat_subset_WT_preOB)
len_RES_preOB <- length(seurat_subset_RES_preOB)
len_OE_mOB <- length(seurat_subset_OE_mOB)
len_OI_mOB <- length(seurat_subset_OI_mOB)
len_WT_mOB <- length(seurat_subset_WT_mOB)
len_RES_mOB <- length(seurat_subset_RES_mOB)
len_OE_lOB <- length(seurat_subset_OE_lOB)
len_OI_lOB <- length(seurat_subset_OI_lOB)
len_WT_lOB <- length(seurat_subset_WT_lOB)
len_RES_lOB <- length(seurat_subset_RES_lOB)
len_OE_endo <- length(seurat_subset_OE_endo)
len_OI_endo <- length(seurat_subset_OI_endo)
len_WT_endo <- length(seurat_subset_WT_endo)
len_RES_endo <- length(seurat_subset_RES_endo)
len_OE_msc <- length(seurat_subset_OE_msc)
len_OI_msc <- length(seurat_subset_OI_msc)
len_WT_msc <- length(seurat_subset_WT_msc)
len_RES_msc <- length(seurat_subset_RES_msc)
length1 <- c(len_OE_chondro,len_OI_chondro,len_WT_chondro,len_RES_chondro) # chondrocytes
length2 <- c(len_OE_imOB,len_OI_imOB,len_WT_imOB,len_RES_imOB) # immature OBs
length3 <- c(len_OE_preOB,len_OI_preOB,len_WT_preOB,len_RES_preOB) # preOBs
length4 <- c(len_OE_mOB,len_OI_mOB,len_WT_mOB,len_RES_mOB) # mature OBs
length5 <- c(len_OE_lOB,len_OI_lOB,len_WT_lOB,len_RES_lOB) # late OBs
length6 <- c(len_OE_endo,len_OI_endo,len_WT_endo,len_RES_endo) # endothelial cells
length7 <- c(len_OE_msc,len_OI_msc,len_WT_msc,len_RES_msc) # mesenchymal lineage cells
len <- data.frame(length1,length2,length3,length4,length5,length6,length7)
colnames(len) <- c("Chondrocyte", "ImmatureOB", "PreOB", "MatureOB", "LateOB", "Endothelial", "MSC")
groups <- c("OE","OI","WT","RES")
lengths <- cbind(groups,len,lengthsc[2],lengthsc[3])
write_rds(seurat_subset_labeled, path = "~/Desktop/rstudio/jacobsen/results/seurat_subset_labeled.rds")
seurat_subset_labeled <- readRDS("~/Desktop/rstudio/jacobsen/results/seurat_subset_labeled.rds")
DimPlot(object = seurat_integrated, reduction = "umap", label = TRUE, label.size = 3, repel = TRUE, split.by= "orig.ident")
Idents(object = seurat_subset_labeled) <- "assigned_ident_general"
seurat_subset_samples <- SplitObject(seurat_subset_labeled, split.by = "orig.ident")
seurat_subset_D8_preOB <- WhichCells(seurat_subset_samples[["D8"]], idents = "PreOB")
seurat_subset_D9_preOB <- WhichCells(seurat_subset_samples[["D9"]], idents = "PreOB")
seurat_subset_D10_preOB <- WhichCells(seurat_subset_samples[["D10"]], idents = "PreOB")
seurat_subset_D11_preOB <- WhichCells(seurat_subset_samples[["D11"]], idents = "PreOB")
seurat_subset_D12_preOB <- WhichCells(seurat_subset_samples[["D12"]], idents = "PreOB")
seurat_subset_E1_preOB <- WhichCells(seurat_subset_samples[["E1"]], idents = "PreOB")
seurat_subset_E2_preOB <- WhichCells(seurat_subset_samples[["E2"]], idents = "PreOB")
seurat_subset_E3_preOB <- WhichCells(seurat_subset_samples[["E3"]], idents = "PreOB")
seurat_subset_D8_imOB <- WhichCells(seurat_subset_samples[["D8"]], idents = "ImmatureOB")
seurat_subset_D9_imOB <- WhichCells(seurat_subset_samples[["D9"]], idents = "ImmatureOB")
seurat_subset_D10_imOB <- WhichCells(seurat_subset_samples[["D10"]], idents = "ImmatureOB")
seurat_subset_D11_imOB <- WhichCells(seurat_subset_samples[["D11"]], idents = "ImmatureOB")
seurat_subset_D12_imOB <- WhichCells(seurat_subset_samples[["D12"]], idents = "ImmatureOB")
seurat_subset_E1_imOB <- WhichCells(seurat_subset_samples[["E1"]], idents = "ImmatureOB")
seurat_subset_E2_imOB <- WhichCells(seurat_subset_samples[["E2"]], idents = "ImmatureOB")
seurat_subset_E3_imOB <- WhichCells(seurat_subset_samples[["E3"]], idents = "ImmatureOB")
seurat_subset_D8_mOB <- WhichCells(seurat_subset_samples[["D8"]], idents = "MatureOB")
seurat_subset_D9_mOB <- WhichCells(seurat_subset_samples[["D9"]], idents = "MatureOB")
seurat_subset_D10_mOB <- WhichCells(seurat_subset_samples[["D10"]], idents = "MatureOB")
seurat_subset_D11_mOB <- WhichCells(seurat_subset_samples[["D11"]], idents = "MatureOB")
seurat_subset_D12_mOB <- WhichCells(seurat_subset_samples[["D12"]], idents = "MatureOB")
seurat_subset_E1_mOB <- WhichCells(seurat_subset_samples[["E1"]], idents = "MatureOB")
seurat_subset_E2_mOB <- WhichCells(seurat_subset_samples[["E2"]], idents = "MatureOB")
seurat_subset_E3_mOB <- WhichCells(seurat_subset_samples[["E3"]], idents = "MatureOB")
seurat_subset_D8_lOB <- WhichCells(seurat_subset_samples[["D8"]], idents = "LateOB")
seurat_subset_D9_lOB <- WhichCells(seurat_subset_samples[["D9"]], idents = "LateOB")
seurat_subset_D10_lOB <- WhichCells(seurat_subset_samples[["D10"]], idents = "LateOB")
seurat_subset_D11_lOB <- WhichCells(seurat_subset_samples[["D11"]], idents = "LateOB")
seurat_subset_D12_lOB <- WhichCells(seurat_subset_samples[["D12"]], idents = "LateOB")
seurat_subset_E1_lOB <- WhichCells(seurat_subset_samples[["E1"]], idents = "LateOB")
seurat_subset_E2_lOB <- WhichCells(seurat_subset_samples[["E2"]], idents = "LateOB")
seurat_subset_E3_lOB <- WhichCells(seurat_subset_samples[["E3"]], idents = "LateOB")
seurat_subset_D8_chondro <- WhichCells(seurat_subset_samples[["D8"]], idents = "Chondrocyte")
seurat_subset_D9_chondro <- WhichCells(seurat_subset_samples[["D9"]], idents = "Chondrocyte")
seurat_subset_D10_chondro <- WhichCells(seurat_subset_samples[["D10"]], idents = "Chondrocyte")
seurat_subset_D11_chondro <- WhichCells(seurat_subset_samples[["D11"]], idents = "Chondrocyte")
seurat_subset_D12_chondro <- WhichCells(seurat_subset_samples[["D12"]], idents = "Chondrocyte")
seurat_subset_E1_chondro <- WhichCells(seurat_subset_samples[["E1"]], idents = "Chondrocyte")
seurat_subset_E2_chondro <- WhichCells(seurat_subset_samples[["E2"]], idents = "Chondrocyte")
seurat_subset_E3_chondro <- WhichCells(seurat_subset_samples[["E3"]], idents = "Chondrocyte")
seurat_subset_D8_endo <- WhichCells(seurat_subset_samples[["D8"]], idents = "Endothelial")
seurat_subset_D9_endo <- WhichCells(seurat_subset_samples[["D9"]], idents = "Endothelial")
seurat_subset_D10_endo <- WhichCells(seurat_subset_samples[["D10"]], idents = "Endothelial")
seurat_subset_D11_endo <- WhichCells(seurat_subset_samples[["D11"]], idents = "Endothelial")
seurat_subset_D12_endo <- WhichCells(seurat_subset_samples[["D12"]], idents = "Endothelial")
seurat_subset_E1_endo <- WhichCells(seurat_subset_samples[["E1"]], idents = "Endothelial")
seurat_subset_E2_endo <- WhichCells(seurat_subset_samples[["E2"]], idents = "Endothelial")
seurat_subset_E3_endo <- WhichCells(seurat_subset_samples[["E3"]], idents = "Endothelial")
seurat_subset_D8_msc <- WhichCells(seurat_subset_samples[["D8"]], idents = "MSC")
seurat_subset_D9_msc <- WhichCells(seurat_subset_samples[["D9"]], idents = "MSC")
seurat_subset_D10_msc <- WhichCells(seurat_subset_samples[["D10"]], idents = "MSC")
seurat_subset_D11_msc <- WhichCells(seurat_subset_samples[["D11"]], idents = "MSC")
seurat_subset_D12_msc <- WhichCells(seurat_subset_samples[["D12"]], idents = "MSC")
seurat_subset_E1_msc <- WhichCells(seurat_subset_samples[["E1"]], idents = "MSC")
seurat_subset_E2_msc <- WhichCells(seurat_subset_samples[["E2"]], idents = "MSC")
seurat_subset_E3_msc <- WhichCells(seurat_subset_samples[["E3"]], idents = "MSC")
len_D8_preOB <- length(seurat_subset_D8_preOB)
len_D9_preOB <- length(seurat_subset_D9_preOB)
len_D10_preOB <- length(seurat_subset_D10_preOB)
len_D11_preOB <- length(seurat_subset_D11_preOB)
len_D12_preOB <- length(seurat_subset_D12_preOB)
len_E1_preOB <- length(seurat_subset_E1_preOB)
len_E2_preOB <- length(seurat_subset_E2_preOB)
len_E3_preOB <- length(seurat_subset_E3_preOB)
len_D8_imOB <- length(seurat_subset_D8_imOB)
len_D9_imOB <- length(seurat_subset_D9_imOB)
len_D10_imOB <- length(seurat_subset_D10_imOB)
len_D11_imOB <- length(seurat_subset_D11_imOB)
len_D12_imOB <- length(seurat_subset_D12_imOB)
len_E1_imOB <- length(seurat_subset_E1_imOB)
len_E2_imOB <- length(seurat_subset_E2_imOB)
len_E3_imOB <- length(seurat_subset_E3_imOB)
len_D8_mOB <- length(seurat_subset_D8_mOB)
len_D9_mOB <- length(seurat_subset_D9_mOB)
len_D10_mOB <- length(seurat_subset_D10_mOB)
len_D11_mOB <- length(seurat_subset_D11_mOB)
len_D12_mOB <- length(seurat_subset_D12_mOB)
len_E1_mOB <- length(seurat_subset_E1_mOB)
len_E2_mOB <- length(seurat_subset_E2_mOB)
len_E3_mOB <- length(seurat_subset_E3_mOB)
len_D8_lOB <- length(seurat_subset_D8_lOB)
len_D9_lOB <- length(seurat_subset_D9_lOB)
len_D10_lOB <- length(seurat_subset_D10_lOB)
len_D11_lOB <- length(seurat_subset_D11_lOB)
len_D12_lOB <- length(seurat_subset_D12_lOB)
len_E1_lOB <- length(seurat_subset_E1_lOB)
len_E2_lOB <- length(seurat_subset_E2_lOB)
len_E3_lOB <- length(seurat_subset_E3_lOB)
len_D8_chondro <- length(seurat_subset_D8_chondro)
len_D9_chondro <- length(seurat_subset_D9_chondro)
len_D10_chondro <- length(seurat_subset_D10_chondro)
len_D11_chondro <- length(seurat_subset_D11_chondro)
len_D12_chondro <- length(seurat_subset_D12_chondro)
len_E1_chondro <- length(seurat_subset_E1_chondro)
len_E2_chondro <- length(seurat_subset_E2_chondro)
len_E3_chondro <- length(seurat_subset_E3_chondro)
len_D8_endo <- length(seurat_subset_D8_endo)
len_D9_endo <- length(seurat_subset_D9_endo)
len_D10_endo <- length(seurat_subset_D10_endo)
len_D11_endo <- length(seurat_subset_D11_endo)
len_D12_endo <- length(seurat_subset_D12_endo)
len_E1_endo <- length(seurat_subset_E1_endo)
len_E2_endo <- length(seurat_subset_E2_endo)
len_E3_endo <- length(seurat_subset_E3_endo)
len_D8_msc <- length(seurat_subset_D8_msc)
len_D9_msc <- length(seurat_subset_D9_msc)
len_D10_msc <- length(seurat_subset_D10_msc)
len_D11_msc <- length(seurat_subset_D11_msc)
len_D12_msc <- length(seurat_subset_D12_msc)
len_E1_msc <- length(seurat_subset_E1_msc)
len_E2_msc <- length(seurat_subset_E2_msc)
len_E3_msc <- length(seurat_subset_E3_msc)
# Create an All OB column
len_D8_AllOB <- len_D8_preOB + len_D8_imOB + len_D8_mOB + len_D8_lOB
len_D9_AllOB <- len_D9_preOB + len_D9_imOB + len_D9_mOB + len_D9_lOB
len_D10_AllOB <- len_D10_preOB + len_D10_imOB + len_D10_mOB + len_D10_lOB
len_D11_AllOB <- len_D11_preOB + len_D11_imOB + len_D11_mOB + len_D11_lOB
len_D12_AllOB <- len_D12_preOB + len_D12_imOB + len_D12_mOB + len_D12_lOB
len_E1_AllOB <- len_E1_preOB + len_E1_imOB + len_E1_mOB + len_E1_lOB
len_E2_AllOB <- len_E2_preOB + len_E2_imOB + len_E2_mOB + len_E2_lOB
len_E3_AllOB <- len_E3_preOB + len_E3_imOB + len_E3_mOB + len_E3_lOB
AllOB <- c(len_D8_AllOB,len_D9_AllOB,len_D10_AllOB,len_D11_AllOB,len_D12_AllOB,len_E1_AllOB,len_E2_AllOB,len_E3_AllOB)
mat1 <- matrix(, nrow=9, ncol=8, byrow=TRUE)
rownames(mat1) <- c("Sample","Group","PreOB","ImmatureOB","MatureOB","LateOB","Chondrocyte","Endothelial","MSC")
colnames(mat1) <- c("D8","D9","D10","D11","D12","E1","E2","E3")
mat1 <- rbind(mat1,AllOB)
mat1 <- mat1[c(1,2,3,4,5,6,10,7,8,9),c(1:8)]
mat1
## D8 D9 D10 D11 D12 E1 E2 E3
## Sample NA NA NA NA NA NA NA NA
## Group NA NA NA NA NA NA NA NA
## PreOB NA NA NA NA NA NA NA NA
## ImmatureOB NA NA NA NA NA NA NA NA
## MatureOB NA NA NA NA NA NA NA NA
## LateOB NA NA NA NA NA NA NA NA
## AllOB 339 1738 1450 258 408 512 1004 1355
## Chondrocyte NA NA NA NA NA NA NA NA
## Endothelial NA NA NA NA NA NA NA NA
## MSC NA NA NA NA NA NA NA NA
PreOB <- c(len_D8_preOB, len_D9_preOB, len_D10_preOB, len_D11_preOB, len_D12_preOB, len_E1_preOB, len_E2_preOB, len_E3_preOB)
ImOB <- c(len_D8_imOB, len_D9_imOB, len_D10_imOB, len_D11_imOB, len_D12_imOB, len_E1_imOB, len_E2_imOB, len_E3_imOB)
MOB <- c(len_D8_mOB, len_D9_mOB, len_D10_mOB, len_D11_mOB, len_D12_mOB, len_E1_mOB, len_E2_mOB, len_E3_mOB)
LOB <- c(len_D8_lOB, len_D9_lOB, len_D10_lOB, len_D11_lOB, len_D12_lOB, len_E1_lOB, len_E2_lOB, len_E3_lOB)
Chon <- c(len_D8_chondro, len_D9_chondro, len_D10_chondro, len_D11_chondro, len_D12_chondro, len_E1_chondro, len_E2_chondro, len_E3_chondro)
Endo <- c(len_D8_endo, len_D9_endo, len_D10_endo, len_D11_endo, len_D12_endo, len_E1_endo, len_E2_endo, len_E3_endo)
MSC <- c(len_D8_msc, len_D9_msc, len_D10_msc, len_D11_msc, len_D12_msc, len_E1_msc, len_E2_msc, len_E3_msc)
mat1[3,] <- PreOB ;
mat1[4,] <- ImOB ;
mat1[5,] <- MOB ;
mat1[6,] <- LOB ;
mat1[8,] <- Chon ;
mat1[9,] <- Endo ;
mat1[10,] <- MSC ;
mat2 <- t(mat1)
mat2 <- as.matrix(mat2, stringsAsFactors = F)
Sample <- c("D8","D9","D10","D11","D12","E1","E2","E3")
Group <- c("OE","OI","RES","OE", "OI", "WT","OI","OI")
mat2 <- cbind(mat2,Group,Sample)
mat3 <- mat2[c(1:8),c(12,11,3,4,5,6,7,8,9,10)]
mat3 <- as.data.frame(mat3, stringsAsFactors = F)
mat3["PreOB"] <- as.numeric(mat3$PreOB)
mat3["ImmatureOB"] <- as.numeric(mat3$ImmatureOB)
mat3["MatureOB"] <- as.numeric(mat3$MatureOB)
mat3["LateOB"] <- as.numeric(mat3$LateOB)
mat3["AllOB"] <- as.numeric(mat3$AllOB)
mat3["Chondrocyte"] <- as.numeric(mat3$Chondrocyte)
mat3["Endothelial"] <- as.numeric(mat3$Endothelial)
mat3["MSC"] <- as.numeric(mat3$MSC)
#str(mat3)
#Normalize Cell Population numbers by chondrocytes
mat3["ratio_chondro_OB"] <- mat3["Chondrocyte"] / mat3["AllOB"] # Generate the Chondrocyte/OB normalization ratio
mat3["norm_PreOB"] <- mat3["PreOB"] * mat3["ratio_chondro_OB"]
mat3["norm_ImmatureOB"] <- mat3["ImmatureOB"] * mat3["ratio_chondro_OB"]
mat3["norm_MatureOB"] <- mat3["MatureOB"] * mat3["ratio_chondro_OB"]
mat3["norm_LateOB"] <- mat3["LateOB"] * mat3["ratio_chondro_OB"]
mat3["norm_AllOB"] <- mat3["AllOB"] * mat3["ratio_chondro_OB"]
mat3["norm_Chondrocyte"] <- mat3["Chondrocyte"] * mat3["ratio_chondro_OB"]
mat3["norm_Endothelial"] <- mat3["Endothelial"] * mat3["ratio_chondro_OB"]
mat3["norm_MSC"] <- mat3["MSC"] * mat3["ratio_chondro_OB"]
#Determine ratios
mat3["ratio_msc_preOB"] <- mat3["norm_MSC"] / mat3["PreOB"] # MSC/PreOB
mat3["ratio_preOB_imOB"] <- mat3["norm_PreOB"] / mat3["ImmatureOB"] # PreOB/ImmatureOBs
mat3["ratio_imOB_mOB"] <- mat3["norm_ImmatureOB"] / mat3["MatureOB"] # ImmatureOBs/Mature OBS
mat3["ratio_mOB_lOB"] <- mat3["norm_MatureOB"] / mat3["LateOB"] # MatureOBs/Late OBS
mat3["ratio_msc_OB"] <- mat3["norm_MSC"] / mat3["AllOB"] # MSC/OB
mat3["ratio_chondro_OB"] <- mat3["norm_Chondrocyte"] / mat3["AllOB"] # Chondrocyte/OB
mat3["ratio_endo_OB"] <- mat3["norm_Endothelial"] / mat3["AllOB"] # Endothelial/OB
mat3
## Sample Group PreOB ImmatureOB MatureOB LateOB AllOB Chondrocyte Endothelial
## D8 D8 OE 272 20 36 11 339 33 2
## D9 D9 OI 336 143 1129 130 1738 217 125
## D10 D10 RES 447 123 785 95 1450 205 103
## D11 D11 OE 198 8 40 12 258 23 6
## D12 D12 OI 216 26 103 63 408 168 41
## E1 E1 WT 149 68 236 59 512 94 54
## E2 E2 OI 492 41 399 72 1004 225 81
## E3 E3 OI 662 65 527 101 1355 274 103
## MSC ratio_chondro_OB norm_PreOB norm_ImmatureOB norm_MatureOB norm_LateOB
## D8 109 0.009476075 26.47788 1.9469027 3.504425 1.070796
## D9 51 0.015589060 41.95167 17.8544304 140.962601 16.231300
## D10 60 0.019988109 63.19655 17.3896552 110.982759 13.431034
## D11 88 0.007947239 17.65116 0.7131783 3.565891 1.069767
## D12 37 0.169550173 88.94118 10.7058824 42.411765 25.941176
## E1 32 0.033706665 27.35547 12.4843750 43.328125 10.832031
## E2 58 0.050222417 110.25896 9.1882470 89.417331 16.135458
## E3 78 0.040890511 133.86568 13.1439114 106.566790 20.423616
## norm_AllOB norm_Chondrocyte norm_Endothelial norm_MSC ratio_msc_preOB
## D8 33 3.212389 0.1946903 10.610619 0.03900963
## D9 217 27.093786 15.6070196 6.367664 0.01895138
## D10 205 28.982759 14.5620690 8.482759 0.01897709
## D11 23 2.050388 0.5348837 7.844961 0.03962102
## D12 168 69.176471 16.8823529 15.235294 0.07053377
## E1 94 17.257812 9.9140625 5.875000 0.03942953
## E2 225 50.423307 18.1523904 12.998008 0.02641872
## E3 274 55.406642 20.8280443 15.772694 0.02382582
## ratio_preOB_imOB ratio_imOB_mOB ratio_mOB_lOB ratio_msc_OB ratio_endo_OB
## D8 1.3238938 0.05408063 0.3185841 0.031299762 0.0005743076
## D9 0.2933683 0.01581438 1.0843277 0.003663788 0.0089798732
## D10 0.5137931 0.02215243 1.1682396 0.005850178 0.0100428062
## D11 2.2063953 0.01782946 0.2971576 0.030406827 0.0020731927
## D12 3.4208145 0.10394061 0.6732026 0.037341407 0.0413783160
## E1 0.4022863 0.05289989 0.7343750 0.011474609 0.0193634033
## E2 2.6892430 0.02302819 1.2419074 0.012946223 0.0180800702
## E3 2.0594720 0.02494101 1.0551167 0.011640364 0.0153712504
write.csv(mat3, "~/Desktop/rstudio/jacobsen/results/OI_cellpop_ratios.csv")
box1 <- ggplot(mat3, aes(factor(Group), mat3$ratio_msc_preOB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of MSC to PreOB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova1 <- rowMeans(data.frame(mat3$ratio_msc_preOB, na.rm=T))
summary(aov17 <- aov(anova1 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.0000781 2.603e-05 0.242 0.863
## Residuals 4 0.0004297 1.074e-04
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova1 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE -2.191451e-03 -0.03873230 0.03434939 0.9940919
## RES-OE -1.016912e-02 -0.06184568 0.04150744 0.8513331
## WT-OE 5.710341e-05 -0.05161946 0.05173366 1.0000000
## RES-OI -7.977667e-03 -0.05515169 0.03919636 0.8966221
## WT-OI 2.248554e-03 -0.04492547 0.04942258 0.9969916
## WT-RES 1.022622e-02 -0.04944473 0.06989717 0.8931754
box2 <- ggplot(mat3, aes(factor(Group), mat3$ratio_preOB_imOB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of PreOB to ImmatureOB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova2 <- rowMeans(data.frame(mat3$ratio_preOB_imOB, na.rm=T))
summary(aov17 <- aov(anova2 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.9328 0.3109 0.866 0.528
## Residuals 4 1.4364 0.3591
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova2 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE 0.1752899 -1.937367 2.287947 0.9848878
## RES-OE -0.6256757 -3.613424 2.362073 0.8286364
## WT-OE -0.6814291 -3.669178 2.306319 0.7933084
## RES-OI -0.8009657 -3.528394 1.926463 0.6603826
## WT-OI -0.8567191 -3.584148 1.870710 0.6187042
## WT-RES -0.0557534 -3.505708 3.394201 0.9998802
box3 <- ggplot(mat3, aes(factor(Group), mat3$ratio_imOB_mOB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of ImmatureOB to MatureOB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova3 <- rowMeans(data.frame(mat3$ratio_imOB_mOB, na.rm=T))
summary(aov17 <- aov(anova3 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.0001323 0.0000441 0.121 0.943
## Residuals 4 0.0014576 0.0003644
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova3 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE 0.002988001 -0.06431004 0.07028604 0.9975607
## RES-OE -0.006901308 -0.10207511 0.08827250 0.9897390
## WT-OE 0.008472425 -0.08670138 0.10364623 0.9815365
## RES-OI -0.009889309 -0.09677071 0.07699209 0.9633377
## WT-OI 0.005484425 -0.08139698 0.09236583 0.9931386
## WT-RES 0.015373734 -0.09452351 0.12527098 0.9362523
box4 <- ggplot(mat3, aes(factor(Group), mat3$ratio_mOB_lOB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of MatureOB to LateOB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova4 <- rowMeans(data.frame(mat3$ratio_mOB_lOB, na.rm=T))
summary(aov17 <- aov(anova4 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.20078 0.06693 6.121 0.0563 .
## Residuals 4 0.04374 0.01093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova4 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE 0.35288388 -0.01576552 0.7215333 0.0574579
## RES-OE 0.43018436 -0.09116462 0.9515333 0.0903001
## WT-OE 0.21325208 -0.30809690 0.7346011 0.4419549
## RES-OI 0.07730048 -0.39862384 0.5532248 0.9065004
## WT-OI -0.13963180 -0.61555613 0.3362925 0.6609569
## WT-RES -0.21693228 -0.81893423 0.3850697 0.5282308
box5 <- ggplot(mat3, aes(factor(Group), mat3$ratio_endo_OB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of Endothelial to OB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova5 <- rowMeans(data.frame(mat3$ratio_endo_OB, na.rm=T))
summary(aov17 <- aov(anova5 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.0001393 4.644e-05 1.236 0.407
## Residuals 4 0.0001503 3.757e-05
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova5 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE 0.0098143136 -0.01179412 0.03142275 0.3719853
## RES-OE 0.0043595280 -0.02619941 0.03491846 0.9329037
## WT-OE 0.0090198266 -0.02153911 0.03957876 0.6573239
## RES-OI -0.0054547856 -0.03335115 0.02244158 0.8535111
## WT-OI -0.0007944871 -0.02869085 0.02710188 0.9993481
## WT-RES 0.0046602986 -0.03062612 0.03994672 0.9452315
box6 <- ggplot(mat3, aes(factor(Group), mat3$ratio_chondro_OB)) +
geom_boxplot(aes(fill = factor(Group)), lwd=0.5) +
geom_jitter(width = 0.065) + ggtitle("Ratio of Chondrocytes to OB (Normalized to Chondro:OB Ratio)") +
xlab("") + ylab("Ratio") + guides(fill=FALSE) +
theme(plot.title = element_text(hjust = 0.5, size=10, face="bold")) +
theme(axis.text=element_text(size=10, face="bold"), axis.title=element_text(size=10)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median)
anova6 <- rowMeans(data.frame(mat3$ratio_chondro_OB, na.rm=T))
summary(aov17 <- aov(anova6 ~ mat3$Group))
## Df Sum Sq Mean Sq F value Pr(>F)
## mat3$Group 3 0.001421 0.0004736 0.537 0.682
## Residuals 4 0.003527 0.0008817
TukeyHSD(aov17)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = anova6 ~ mat3$Group)
##
## $`mat3$Group`
## diff lwr upr p adj
## OI-OE 0.030175692 -0.07450654 0.1348579 0.6714964
## RES-OE 0.005638226 -0.14240481 0.1536813 0.9984518
## WT-OE 0.012497504 -0.13554553 0.1605405 0.9841236
## RES-OI -0.024537465 -0.15968165 0.1106067 0.8770730
## WT-OI -0.017678188 -0.15282237 0.1174660 0.9466113
## WT-RES 0.006859278 -0.16408610 0.1778047 0.9981925
ggarrange(box1,box2,box3,box4,box5,box6, ncol = 3, nrow = 2)
ggsave("Cell_population_ratios.tiff", device = "tiff", scale = 1.5, dpi = 300,
path ="~/Desktop/rstudio/jacobsen/results", width = 7, height = 5, units = "in")
# Restore numerical Cluster IDs
# Idents(object = seurat_integrated) <- "integrated_snn_res.0.5"
# Test for DE features using the MAST package
# head(FindMarkers(seurat_integrated, ident.1 = "1", ident.2 = "2", test.use = "MAST"))
# # Extract raw counts and metadata to create SingleCellExperiment object
# counts <- seurat_integrated@assays$RNA@counts
#
# metadata <- seurat_integrated@meta.data
#
# # Set up metadata as desired for aggregation and DE analysis
# metadata$cluster_id <- factor(seurat_integrated@active.ident)
#
# # Create single cell experiment object
# sce <- SingleCellExperiment(assays = list(counts = counts), colData = metadata)
#
# # Identify groups for aggregation of counts
# groups <- colData(sce)[, c("cluster_id", "group")]
#
# dds_OI <- GetAssay(object = filtered_seurat, slots = c("counts","features"), assay="RNA") # Extract Counts
#
# OI_counts <- dds_OI@counts@p
# OI_features <- dds_OI@counts@Dimnames[[1]]
# OI_cellIDS <- dds_OI@counts@Dimnames[[2]]
#
# #OI_data <- (OI_counts,OI_features,OI_cellIDS)
# #head(OI_data)
#
# meta_OI <- read.csv("~~/R/singlecell/singlecellcalv1OI_metadata.csv", stringsAsFactors=T)
# tx2gene <- read.delim("tx2gene_grcm38_mm9_v79_wtransgenes_24Jul20.txt")
#
# meta_names_all <- factor(c("D8","D9","D10","D11","D12","E1","E2","E3"))
# rownames(meta_OI) <- meta_names_all
#
# # Does data in x match data in y?
# print("Does data in txi counts match data in metadata?")
# all(colnames(txi_all$counts) %in% rownames(meta_OI))
#
# # Is data in the same order in x as y?
# print("Are txi counts in the same order as in metadata?")
# all(colnames(txi_all$counts) == rownames(meta_OI))
#
# dds_OI <- DESeqDataSetFromTximport(OI_counts, colData = meta_OI, design = ~ group)
#
# dds_OI@colData@rownames
# Show(counts(dds_OI))
# dds_OI <- DESeqDataSetFromTximport(txi_OI, colData = meta_OI, design = ~ tissue)
# dds_OI <- estimateSizeFactors(dds_OI)
# sizeFactors(dds_OI)
# norm_counts_OI <- counts(dds_OI, normalized=TRUE)
# rld_bm_peri <- rlog(dds_OI, blind=TRUE)
# rld_mat_bm_peri <- assay(dds_OI)
# h1_plot2 <- pheatmap(rld_cor_OI, annotation = meta2, cluster_rows = TRUE, main="Heatmap")
# design(dds_OI) <- formula(~ tissue)
# dds_OI <- DESeq(dds_OI)
# res_bm_peri <- results(dds_OI)
# con_bm_peri <- c("group","OI","WT","RES","OE")
# results(dds_OI, contrast = con_OI)
# res_bm_peri <- results(dds_OI, contrast=con_bm_peri, alpha = 0.05)
# res_bm_peri %>% data.frame()